{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# First part:  a simple instruction on how to apply the feasibility predictor. The main steps include:<br>\n",
    "* Load the feasibility predictor\n",
    "* Parse a packing instance\n",
    "* Pass the feature of the instance to the predictor and obtain the prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch as T\n",
    "import pickle as pkl\n",
    "import numpy as np\n",
    "import random\n",
    "from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler\n",
    "import torch.nn as nn\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1. Load the feasibility predictor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictor = T.jit.load('Feasibility_Predictor.pt')\n",
    "device = T.device(\"cuda\" if T.cuda.is_available() else \"cpu\")\n",
    "predictor  = predictor.to(device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step2. Parse a packing instance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_manual_feature(items, bin_width, bin_height):\n",
    "    \"\"\"\n",
    "    The method is to extract features from a set of items\n",
    "    Parameters:\n",
    "    ----------\n",
    "    items (a list of np.arrays)\n",
    "    bin_width (int): the width of a bin\n",
    "    bin_height (int): the height of a bin\n",
    "    Returns (a list of metrics)\n",
    "    -------\n",
    "    notes:\n",
    "    we extract five types of features:\n",
    "    1) the ratio between width and height, and four statistical metrics (mean, min, max, std) as the features\n",
    "    2) the ratio between width and the bin width, four statistical metrics (mean, min, max, std) as the features\n",
    "    3) the ratio between height and the bin height, four statistical metrics (mean, min, max, std) as the features\n",
    "    4) the ratio between area of a item and the bin capacity, four statistical metrics (mean, min, max, std) as the features\n",
    "    5) the ratio between total area of the items and the bin capacity, a single metric\n",
    "    \"\"\"\n",
    "    MAX_W_H_RATIO = 18\n",
    "    capacity = bin_width * bin_height\n",
    "    w_h_ratios = np.asarray(list(map(lambda x: x[0] / x[1], items))) / MAX_W_H_RATIO\n",
    "    w_bin_ratios = np.asarray(list(map(lambda x: x[0] / bin_width, items)))\n",
    "    h_bin_ratios = np.asarray(list(map(lambda x: x[1] / bin_height, items)))\n",
    "    area_capacity_ratios = np.asarray(list(map(lambda x: (x[0] * x[1]) / capacity, items)))\n",
    "    total_area = np.asarray(list(map(lambda x: x[0] * x[1], items))).sum()\n",
    "    w_h_features = [w_h_ratios.mean(), w_h_ratios.min(), w_h_ratios.max(), w_h_ratios.std()]\n",
    "    w_bin_features = [w_bin_ratios.mean(), w_bin_ratios.min(), w_bin_ratios.max(), w_bin_ratios.std()]\n",
    "    h_bin_features = [h_bin_ratios.mean(), h_bin_ratios.min(), h_bin_ratios.max(), h_bin_ratios.std()]\n",
    "    area_capacity_features = [area_capacity_ratios.mean(), area_capacity_ratios.min(),\n",
    "                              area_capacity_ratios.max(), area_capacity_ratios.std()]\n",
    "    total_area_capacity_features = [total_area / capacity]\n",
    "    extracted_features = [w_h_features, w_bin_features, h_bin_features, area_capacity_features,\n",
    "                          total_area_capacity_features]\n",
    "    result = []\n",
    "    for x in extracted_features:\n",
    "        result.extend(x)\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Demo instance A, result: infeasible \n",
    "# width, height\n",
    "# 6,4\n",
    "# 17,1\n",
    "# 8,1\n",
    "# 15,2\n",
    "# 2,6\n",
    "# 13,1\n",
    "# 4,7\n",
    "# 7,4\n",
    "# 6,4\n",
    "# 2,5\n",
    "\n",
    "# Demo instance B, result: feasible:\n",
    "# 2,9\n",
    "# 3,4\n",
    "# 2,6\n",
    "# 13,1\n",
    "# 4,7\n",
    "# 7,4\n",
    "# 6,4\n",
    "# 2,5\n",
    "instance = [\n",
    "           # width, height\n",
    "            [2,9], \n",
    "            [3,4],\n",
    "            [2,6],\n",
    "            [13,1],\n",
    "            [4,7],\n",
    "            [7,4],\n",
    "            [6,4],\n",
    "            [2,5]\n",
    "            ]\n",
    "features = extract_manual_feature(instance, bin_width = 20, bin_height = 10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step3. Pass the feature of the instance to the predictor and obtain the prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RecursiveScriptModule(\n",
       "  original_name=FFNNFeasibilityChecker\n",
       "  (linear1): RecursiveScriptModule(original_name=Linear)\n",
       "  (linear2): RecursiveScriptModule(original_name=Linear)\n",
       "  (linear3): RecursiveScriptModule(original_name=Linear)\n",
       ")"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictor.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([0.1287, 0.0123, 0.7222, 0.2262, 0.2438, 0.1000, 0.6500, 0.1775, 0.5000,\n",
       "        0.1000, 0.9000, 0.2236, 0.0906, 0.0500, 0.1400, 0.0352, 0.7250],\n",
       "       device='cuda:0')"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = T.tensor(features, dtype=T.float).to(device)\n",
    "features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "feasible\n"
     ]
    }
   ],
   "source": [
    "with T.no_grad():\n",
    "    y = predictor(features)\n",
    "y\n",
    "if y > 0.5:\n",
    "    print(\"infeasible\")\n",
    "else:\n",
    "    print(\"feasible\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Second part: an instruction on how to parse a .pkl file and run the feasibility predictor on testing samples.\n",
    "\n",
    "The shared training, testing samples are in .pkl format. We present the right codes to parse them and apply the predictor on the testing samples to reproduce the results shown by Figure 5 in the paper. One could also train a new predictor with a different architecture by the shared training samples with the codes."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## unpickled the .pkl files\n",
    "The testing samples and the training samples are both stored in .pkl format, which is a highly compact format for storing large dataset. The following snippet can be used to unpickled the files and access the data correctly.<br>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What is inside a .pkl file?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read the demo samples\n",
    "with open(\"demo_samples.pkl\", \"rb\") as fp:\n",
    "    demo_dataset = pkl.load(fp)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Inside a .pkl file, samples are organized as in the defaultdict format. The key of the defaultdict is the id of each sample. The corresponding value is a sub-dict which contains the attributes of the sample.<br> \n",
    "Take the sample with \"id = 24900\" in the dataset as an example."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'key': '2L_VRPTW-25-R1-55-PC5_Batch4.csv21_0.tr',\n",
       " 'label': 0,\n",
       " 'bin': array([20, 10]),\n",
       " 'items': array([[2, 4],\n",
       "        [4, 2],\n",
       "        [3, 3],\n",
       "        [3, 3],\n",
       "        [2, 5],\n",
       "        [4, 2],\n",
       "        [4, 1],\n",
       "        [5, 2],\n",
       "        [3, 1],\n",
       "        [6, 2],\n",
       "        [3, 2],\n",
       "        [2, 2],\n",
       "        [2, 2]]),\n",
       " 'packing_class': '5'}"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "demo_dataset[24900]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* key: the unique id of the sample\n",
    "* label: 0 -> feasible, 1 -> infeasible\n",
    "* bin: the size of the bin, [width, height]\n",
    "* items: a list of items, [[width_1, height_1],..[width_m, height_m]]\n",
    "* packing_class: specify which packing class it belongs to"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Customized dataset\n",
    "Pytorch provides an abstract classic to represent samples for training and testing. One could click the link for more information.\n",
    "https://pytorch.org/tutorials/beginner/data_loading_tutorial.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ItemDataset(Dataset):\n",
    "    def __init__(self, dataset_file_path):\n",
    "        super(ItemDataset, self).__init__()\n",
    "        # open the .pkl file, the dataset \n",
    "        with open(dataset_file_path, \"rb\") as fp:\n",
    "            dataset = pkl.load(fp)\n",
    "        self.dataset = {k: v for k, v in dataset.items() if v['label'] == 1 or v['label'] == 0}\n",
    "    def __len__(self):\n",
    "        return len(self.dataset.keys())\n",
    "\n",
    "    def __getitem__(self, i):\n",
    "        sample_idx = list(self.dataset.keys())[i]\n",
    "        sample = self.dataset[sample_idx]\n",
    "        items, label = sample['items'], sample['label']\n",
    "        max_width, max_height = sample['bin']\n",
    "        manual_features = np.asarray(extract_manual_feature(items, max_width, max_height))\n",
    "        return manual_features, label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare data loader for the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_loaders(dataset, samplers):\n",
    "    test_loader = DataLoader(dataset, batch_size=1,\n",
    "                             sampler=samplers[\"test\"])\n",
    "    return test_loader\n",
    "def create_samplers(dataset, val_size=3000, split=True):\n",
    "    \"\"\"Create samplers to randomly sample from the dataset\n",
    "    \"\"\"\n",
    "    dataset_idxs = list(range(len(dataset)))\n",
    "    return {\"test\": SubsetRandomSampler(dataset_idxs)}\n",
    "def inference_mode(model, test_loader, criterion, device):\n",
    "    test_loss, test_acc, all_y, all_pred = eval(model, test_loader, criterion, device)\n",
    "    print(f\"Test loss {test_loss:.3f} Test acc {test_acc:.3f}\")\n",
    "    all_y = [x.item() for x in all_y]\n",
    "    all_pred = [x.item() for x in all_pred]\n",
    "    print(confusion_matrix(all_y, all_pred))\n",
    "def eval(model, dataloader, criterion, device):\n",
    "    model.eval()\n",
    "    test_loss = 0\n",
    "    correct = 0\n",
    "    n_sample = 0\n",
    "    all_y = []\n",
    "    all_pred = []\n",
    "    with T.no_grad():\n",
    "        for batch in dataloader:\n",
    "            x, y = batch\n",
    "            x, y = x.float().to(device), y.float().to(device)\n",
    "            y_pred = model(x)\n",
    "            y_pred = y_pred.reshape(-1)\n",
    "            loss = criterion(y_pred, y)\n",
    "            all_y.append(y)\n",
    "            all_pred.append((y_pred >= 0.5).float())\n",
    "            test_loss += (x.shape[0] * loss.item())\n",
    "            correct += ((y_pred >= 0.5).float() == y).sum().item()\n",
    "            n_sample += x.shape[0]\n",
    "    return test_loss / n_sample, correct / n_sample, all_y, all_pred"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Perform inference on the testing samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main(dataset_file_path):\n",
    "    dataset = ItemDataset(dataset_file_path)\n",
    "    samplers = create_samplers(dataset, split=False)\n",
    "    test_loader = create_loaders(dataset, samplers)\n",
    "    # feature_train_loader\n",
    "    # Load model\n",
    "    device = T.device(\"cuda\" if T.cuda.is_available() else \"cpu\")\n",
    "    print(\"Loading the model\")\n",
    "    model = T.jit.load('Feasibility_Predictor.pt')\n",
    "    model = model.to(device)\n",
    "    criterion = nn.BCELoss()\n",
    "    # Train or Infer\n",
    "    inference_mode(model,test_loader,criterion,device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### We run test the model over the hybrid testing samples which includes instances of all the packing classes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading the model\n",
      "Test loss 0.121 Test acc 0.958\n",
      "[[19773  1074]\n",
      " [  662 20185]]\n"
     ]
    }
   ],
   "source": [
    "main(dataset_file_path = \"./testing_samples/HybridClasses.pkl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Then the model is run over the testing samples of packing class 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading the model\n",
      "Test loss 0.268 Test acc 0.882\n",
      "[[5219 1275]\n",
      " [ 176 5588]]\n"
     ]
    }
   ],
   "source": [
    "main(dataset_file_path = \"./testing_samples/PackingClass2.pkl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Then the model is run over the testing samples of packing class 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading the model\n",
      "Test loss 0.153 Test acc 0.945\n",
      "[[4773  214]\n",
      " [ 337 4650]]\n"
     ]
    }
   ],
   "source": [
    "main(dataset_file_path = \"./testing_samples/PackingClass3.pkl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### The the model is run over the testing samples of packing class 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading the model\n",
      "Test loss 0.235 Test acc 0.907\n",
      "[[207   3]\n",
      " [ 36 174]]\n"
     ]
    }
   ],
   "source": [
    "main(dataset_file_path = \"./testing_samples/PackingClass4.pkl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### The the model is run over the testing samples of packing class 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading the model\n",
      "Test loss 0.009 Test acc 1.000\n",
      "[[62720    14]\n",
      " [    0     0]]\n"
     ]
    }
   ],
   "source": [
    "main(dataset_file_path = \"./testing_samples/PackingClass5.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python (ml)",
   "language": "python",
   "name": "ml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
